Prep data

Load necessary packages and set Working Directory

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse,zoo,lubridate,plotrix,ggpubr, caret, broom, kableExtra, reactable, effsize, install = T)
setwd("/Users/stevenmesquiti/Desktop/CEO Project/Manuscript analyses") 

Define aesthetics

colors =  c("CEO" = "dodgerblue3","Reddit" = "red")
palette_map = c("#3B9AB2", "#EBCC2A", "#F21A00")
palette_condition = c("#ee9b00", "#bb3e03", "#005f73")

plot_aes = theme_classic() +
  theme(legend.position = "top",
        legend.text = element_text(size = 10),)+
  theme(text = element_text(size = 16, family = "Futura Medium")) + 
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))+
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 20)) +
  theme(axis.text=element_text(size = 14),
        axis.title=element_text(size = 20,face="bold"))

Write our Table Funcions

baseline_ttest <- function(ttest_list) {
  # Extract relevant information from each test and store in a data frame
   ttest_df <- data.frame(
    Group1 = seq(0, 0, 1),
    Group2 = seq(1, 24, 1),
    t = sapply(ttest_list, function(x) paste0("t(", round(x$parameter, 3), ") = ", round(x$statistic, 3))),
    p_value = sapply(ttest_list, function(x) x$p.value)
  )
  
  # Format p-values as scientific notation
  ttest_df$p_value <- format(ttest_df$p_value, scientific = T)
  
  # Rename columns
  colnames(ttest_df) <- c("t", "t + 1 ", "t-statistic", "p-value")
  
  # Create table using kableExtra
  kable(ttest_df, caption = "Summary of Welch's t-Tests", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}

post_pandemic_summary <- function(ttest_list) {
  # Extract relevant information from each test and store in a data frame
  ttest_df <- data.frame(
    Group1 = seq(12,23,1),
    Group2 = seq(13,24,1),
    t = sapply(ttest_list, function(x) paste0("t(", round(x$parameter, 3), ") = ", round(x$statistic, 3))),
    p_value = sapply(ttest_list, function(x) x$p.value)
  )
  
  # Format p-values as scientific notation
  ttest_df$p_value <- format(ttest_df$p_value, scientific = T)
  
  # Rename columns
  colnames(ttest_df) <- c("t", "t + 1 ", "t-value", "p-value")
  
  # Create table using kableExtra
  kable(ttest_df, caption = "Summary of Welch's t-Tests", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}



baseline_cohen_d <- function(cohen_d_list) {
  # Extract relevant information from each test and store in a data frame
  cohen_d_df <- data.frame(
    Group1 = seq(0,0,1),
    Group2 = seq(1,24,1),
    Cohen_d = sapply(cohen_d_list, function(x) x$estimate)
  )
  
  # Rename columns
  colnames(cohen_d_df) <- c("t", "t + 1", "Cohen's d")
  
  # Create table using kableExtra
  kable(cohen_d_df, caption = "Summary of Cohen's D", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}

post_cohen_d <- function(cohen_d_list) {
  # Extract relevant information from each test and store in a data frame
  cohen_d_df <- data.frame(
    Group1 = seq(12,23,1),
    Group2 = seq(13,24,1),
    Cohen_d = sapply(cohen_d_list, function(x) x$estimate)
  )
  
  # Rename columns
  colnames(cohen_d_df) <- c("t", "t+1", "Cohen's d")
  
  # Create table using kableExtra
  kable(cohen_d_df, caption = "Summary of Cohen's D", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}

baseline_mean_diff <- function(mean_diff_list) {
  # Extract relevant information from each mean difference calculation and store in a data frame
  mean_diff_df <- data.frame(
    Group1 = seq(0,0,1),
    Group2 = seq(1,24,1),
    mean_diff = mean_diff_list
  )
  
  # Rename columns
  colnames(mean_diff_df) <- c("t", "t+1", "Mean Difference")
  
  # Create table using kableExtra
  kable(mean_diff_df, caption = "Summary of Mean Differences", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}


post_mean_diff <- function(mean_diff_list) {
  # Extract relevant information from each mean difference calculation and store in a data frame
  mean_diff_df <- data.frame(
    Group1 = seq(12,23,1),
    Group2 = seq(13,24,1),
    mean_diff = mean_diff_list
  )
  
  # Rename columns
  colnames(mean_diff_df) <- c("t", "t+1", "Mean Difference")
  
  # Create table using kableExtra
  kable(mean_diff_df, caption = "Summary of Mean Differences", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}

Load in the CEO Data

data  <-  read_csv("https://raw.githubusercontent.com/scm1210/Language_Lab_Repro/main/Big_CEO.csv") #read in the data from github 

data <- data["2019-03-01"<= data$Date & data$Date <= "2021-04-01",] #subsetting covid dates 

nonclean <- nrow(data)

data <- data %>% filter(WC<=5400) %>% #filter out based on our exclusion criteria
  filter(WC>=25)

clean <- nrow(data)

nonclean - clean
## [1] 191
data$month_year <- format(as.Date(data$Date), "%Y-%m") #reformat 

data_tidy <- data %>% dplyr::select(Date, Speaker, Analytic, cogproc,allnone,we,i,emo_anx) %>%
  mutate(Date = lubridate::ymd(Date),
         time_month = as.numeric(Date - ymd("2019-03-01")) / 30, #centering at start of march
         time_month_quad = time_month * time_month) #making our quadratic term

data_tidy$Date_off <- floor(data_tidy$time_month) #rounding off dates to whole months using ceiling function (0 = 2019-03, 24 = 2021-04)
data_tidy$Date_covid <- as.factor(data_tidy$Date_off) #factorize

Load in Reddit data

reddit <- read_csv("/Users/stevenmesquiti/Dropbox/CEO-data/LIWC22-data/BLM_LIWC22_cleaned.csv")

reddit <- reddit %>%
  mutate(month_year = format(Date, "%Y-%m"))

reddit <- reddit %>% filter(WC<=5400)   %>% 
  filter(WC>=25)




reddit_tidy <- reddit %>% dplyr::select(Date, Analytic, cogproc,we,i) %>%
  mutate(Date = lubridate::ymd(Date),
         time_month = as.numeric(Date - ymd("2019-03-01")) / 30) #making our quadratic term


reddit_tidy$Date_off <- floor(reddit_tidy$time_month) #rounding off dates to whole months using ceiling function (0 = 2019-03, 24 = 2021-04)
reddit_tidy$Date_covid <- as.factor(reddit_tidy$Date_off) #factorize

Create Tidy Data for Graphs

df <- read_csv("https://raw.githubusercontent.com/scm1210/Language_Lab_Repro/main/Big_CEO.csv")#put code here to read in Big CEO data
df <- df %>% filter(WC<=5400)   %>% 
  filter(WC>=25)

df$month_year <- format(as.Date(df$Date), "%Y-%m") ###extracting month and year to build fiscal quarter graphs, need a new variable bc if not it'll give us issues

df2 <- df %>%#converting our dates to quarterly dates 
  group_by(month_year) %>% ###grouping by the Top100 tag and date 
  summarise_at(vars("Date","WC", "Analytic", "Clout", "Drives","cogproc","focuspast","focuspresent",
                    "focusfuture","power","allure","we","i",'insight','emo_anx','allnone'),  funs(mean, std.error),) #pulling the means and SEs for our variables of interest

df2 <- df2["2019-01"<= df2$month_year & df2$month_year <= "2021-03",] #covid dates 



reddit_tidy_2 <- reddit %>%#converting our dates to quarterly dates 
  group_by(month_year) %>% ###grouping by the Top100 tag and date 
  summarise_at(vars("Date","WC", "Analytic","cogproc","we","i"),  funs(mean, std.error),)

Write our Stats Functions

We were interested in how language changed relative to baseline one year pre-pandemic, as well as how language changed after the Pandemic.

As a result we ran two separate set of analyses comparing t(time zero) to t[i] and t(12 months after our centered data point) to t + 1. The groups you see will be centered on 03/2019. That is, 12 = 03/2020, 13 = 04/2020, etc. etc.

Analytic Thinking

analytic_my.t = function(fac1, fac2){
  t.test(data_tidy$Analytic[data_tidy$Date_covid==fac1], 
         data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t[i] 

analytic_my.d = function(fac1, fac2){
  cohen.d(data_tidy$Analytic[data_tidy$Date_covid==fac1], 
          data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #function for cohen's d

analytic_mean <-  function(fac1, fac2){
  mean(data_tidy$Analytic[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #function to do mean differences

Cognitive Processing

cogproc_my.t = function(fac1, fac2){
  t.test(data_tidy$cogproc[data_tidy$Date_covid==fac1], 
         data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t[i] 


cogproc_my.d = function(fac1, fac2){
  cohen.d(data_tidy$cogproc[data_tidy$Date_covid==fac1], 
          data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #function for cohen's d

cogproc_mean <-  function(fac1, fac2){
  mean(data_tidy$cogproc[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #function to do mean differences

I-words

i_my.t = function(fac1, fac2){
  t.test(data_tidy$i[data_tidy$Date_covid==fac1], 
         data_tidy$i[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t + 1 

i_my.d = function(fac1, fac2){
  cohen.d(data_tidy$i[data_tidy$Date_covid==fac1], 
          data_tidy$i[data_tidy$Date_covid==fac2])
} #function for cohen's d


i_mean <-  function(fac1, fac2){
  mean(data_tidy$i[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$i[data_tidy$Date_covid==fac2])
} #function to do mean differences

We-words

we_my.t = function(fac1, fac2){
  t.test(data_tidy$we[data_tidy$Date_covid==fac1], 
         data_tidy$we[data_tidy$Date_covid==fac2])
} 

we_my.d = function(fac1, fac2){
  cohen.d(data_tidy$we[data_tidy$Date_covid==fac1], 
          data_tidy$we[data_tidy$Date_covid==fac2])
} #function for cohen's d

we_mean <-  function(fac1, fac2){
  mean(data_tidy$we[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$we[data_tidy$Date_covid==fac2])
} #function to do mean differences

Tidy data

Data transformations

  • None

Exclusions

  • Excluded texts that were shorter than ** 25 words ** and greater than ** 5,400 words **!

Summary of the Data

Range of Dates

range(data$Date)
## [1] "2019-03-01" "2021-04-01"

Number of Speakers

speakers <- data %>%
  select(Speaker) %>%
  unique() %>%
  dplyr::summarize(n = n()) %>%
  reactable::reactable(striped = TRUE)
speakers

Number of Transcripts

transcripts <- data %>%
  select(1) %>%
  dplyr::summarize(n = n()) %>%
  reactable::reactable(striped = TRUE)
transcripts

Mean Word Count

word_count <- data %>%
  select(WC) %>%
  dplyr::summarize(mean = mean(WC)) %>%
  reactable::reactable(striped = TRUE)
word_count

How did language change after the Pandemic?

Analytic Thinking

T-test

analytic_ttest<- mapply(analytic_my.t,seq(12,23,1), seq(13,24,1),SIMPLIFY=F) #compare t (first parantheses) to t[i] (second parentheses)increasing by 1
post_pandemic_summary(analytic_ttest)
Summary of Welch’s t-Tests
t t + 1 t-value p-value
12 13 t(525.793) = 5.085 5.124345e-07
13 14 t(373.064) = -2.595 9.838752e-03
14 15 t(252.035) = -1.673 9.565479e-02
15 16 t(377.617) = 1.924 5.508471e-02
16 17 t(200.57) = -2.212 2.808412e-02
17 18 t(218.933) = -1.687 9.298455e-02
18 19 t(262.609) = 0.62 5.358364e-01
19 20 t(128.217) = 0.874 3.838664e-01
20 21 t(230.756) = -1.54 1.249802e-01
21 22 t(94.317) = 1.953 5.374259e-02
22 23 t(55.552) = -1.15 2.551600e-01
23 24 t(2141.372) = -1.718 8.595937e-02

Cohen’s D

analytic_d <- mapply(analytic_my.d,seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE) 
post_cohen_d(analytic_d)
Summary of Cohen’s D
t t+1 Cohen’s d
12 13 0.3274589
13 14 -0.1597933
14 15 -0.1320224
15 16 0.1935631
16 17 -0.1616992
17 18 -0.1481301
18 19 0.0709701
19 20 0.0898748
20 21 -0.1246402
21 22 0.2681803
22 23 -0.1598304
23 24 -0.0739462

Mean Differences

analytic_meandiff <- mapply(analytic_mean, seq(12,23,1), seq(13,24,1)) #across all of the months comparing to time zero
post_mean_diff(analytic_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
12 13 4.734622
13 14 -2.190455
14 15 -1.844328
15 16 2.748318
16 17 -2.231753
17 18 -2.101267
18 19 1.158869
19 20 1.276462
20 21 -1.779122
21 22 4.065080
22 23 -2.075629
23 24 -0.994088

Cogproc

T-test

cogproc_ttest <-mapply(cogproc_my.t, seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
post_pandemic_summary(cogproc_ttest)
Summary of Welch’s t-Tests
t t + 1 t-value p-value
12 13 t(534.573) = -4.316 1.892660e-05
13 14 t(366.536) = 1.405 1.609866e-01
14 15 t(257.865) = 4.019 7.665356e-05
15 16 t(367.3) = -3.132 1.877275e-03
16 17 t(199.239) = 0.987 3.249415e-01
17 18 t(223.61) = 4.18 4.177506e-05
18 19 t(285.883) = -1.198 2.317513e-01
19 20 t(133.619) = -1.493 1.378047e-01
20 21 t(234.846) = 3.211 1.508000e-03
21 22 t(87.346) = -1.705 9.183489e-02
22 23 t(55.376) = 0.997 3.232089e-01
23 24 t(2145.127) = -0.999 3.177001e-01

Cohen’s D

cogproc_d <-mapply(cogproc_my.d, seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE)
post_cohen_d(cogproc_d)
Summary of Cohen’s D
t t+1 Cohen’s d
12 13 -0.2755415
13 14 0.0887056
14 15 0.3007241
15 16 -0.3204553
16 17 0.0732556
17 18 0.3435609
18 19 -0.1329353
19 20 -0.1294167
20 21 0.2476709
21 22 -0.2453381
22 23 0.1405453
23 24 -0.0429758

Mean Differences

cogproc_meandiff <- mapply(cogproc_mean, seq(12,23,1), seq(13,24,1)) # comparing time zero [3/2019]across all of the months
post_mean_diff(cogproc_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
12 13 -0.6107287
13 14 0.1784774
14 15 0.6094504
15 16 -0.6540232
16 17 0.1559844
17 18 0.7442075
18 19 -0.2962170
19 20 -0.2746360
20 21 0.5304979
21 22 -0.5357971
22 23 0.2775877
23 24 -0.0886600

I-words

T-test

i_ttest <- mapply(i_my.t, seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE) #compare t (first paratheses) to t[i] (second parentheses) increasing by 1
post_pandemic_summary(i_ttest)
Summary of Welch’s t-Tests
t t + 1 t-value p-value
12 13 t(477.851) = -5.103 4.841738e-07
13 14 t(362.97) = 2.968 3.193717e-03
14 15 t(261.205) = 2.735 6.660709e-03
15 16 t(336.981) = -3.589 3.805206e-04
16 17 t(191.52) = 1.761 7.976208e-02
17 18 t(240.733) = 3.439 6.870032e-04
18 19 t(255.111) = -2.602 9.812584e-03
19 20 t(134.906) = 0.45 6.532009e-01
20 21 t(248.773) = 1.506 1.333518e-01
21 22 t(84.284) = 2.016 4.699962e-02
22 23 t(57.559) = -3.807 3.436805e-04
23 24 t(2135.84) = 4.409 1.087616e-05

Cohen’s D

i_d <- mapply(i_my.d,seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE)
post_cohen_d(i_d)
Summary of Cohen’s D
t t+1 Cohen’s d
12 13 -0.3467518
13 14 0.1902125
14 15 0.1990807
15 16 -0.3757604
16 17 0.1451672
17 18 0.2369631
18 19 -0.3007221
19 20 0.0377993
20 21 0.1020099
21 22 0.2971566
22 23 -0.4621942
23 24 0.1900173

Mean Differences

i_meandiff <- mapply(i_mean,seq(12,23,1), seq(13,24,1)) # comparing time zero [3/2020]across all of the months
post_mean_diff(i_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
12 13 -0.2878044
13 14 0.1550533
14 15 0.1624754
15 16 -0.3241516
16 17 0.1289192
17 18 0.2083141
18 19 -0.2363725
19 20 0.0329017
20 21 0.0885966
21 22 0.2292627
22 23 -0.3911951
23 24 0.1657095

We-words

T-test

we_ttest <- mapply(we_my.t, seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
post_pandemic_summary(we_ttest)
Summary of Welch’s t-Tests
t t + 1 t-value p-value
12 13 t(527.076) = 4.104 4.708824e-05
13 14 t(378.819) = 0.912 3.625070e-01
14 15 t(253.139) = -3.323 1.023448e-03
15 16 t(373.961) = 2.465 1.416113e-02
16 17 t(197.518) = -0.338 7.360894e-01
17 18 t(229.495) = -4.276 2.793946e-05
18 19 t(262.602) = 2.551 1.130991e-02
19 20 t(131.794) = -0.142 8.871422e-01
20 21 t(238.212) = -1.94 5.361708e-02
21 22 t(84.062) = -0.295 7.685396e-01
22 23 t(55.764) = 0.856 3.958478e-01
23 24 t(2137.765) = -0.35 7.267188e-01

Cohen’s D

we_d <- mapply(we_my.d, seq(12,23,1), seq(13,24,1),SIMPLIFY=FALSE)
post_cohen_d(we_d)
Summary of Cohen’s D
t t+1 Cohen’s d
12 13 0.2639367
13 14 0.0549934
14 15 -0.2594704
15 16 0.2501259
16 17 -0.0255875
17 18 -0.3276203
18 19 0.2920369
19 20 -0.0129636
20 21 -0.1443587
21 22 -0.0435999
22 23 0.1169953
23 24 -0.0150573

Mean Differences

we_meandiff <- mapply(we_mean, seq(12,23,1), seq(13,24,1)) # comparing time zero [3/2020]across all of the months
post_mean_diff(we_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
12 13 0.3777932
13 14 0.0763380
14 15 -0.3676046
15 16 0.3649285
16 17 -0.0365235
17 18 -0.4710551
18 19 0.4168557
19 20 -0.0182846
20 21 -0.2041654
21 22 -0.0608833
22 23 0.1582888
23 24 -0.0209555

How did language change relative to baseline (one year before the pandemic; 03/2019)?

Analytic Thining

T-test

analytic_ttest_baseline <-mapply(analytic_my.t,0, seq(1,24,1),SIMPLIFY=FALSE) #compare t (first parantheses) to t[i] (second parentheses)increasing by 1
baseline_ttest(analytic_ttest_baseline)
Summary of Welch’s t-Tests
t t + 1 t-statistic p-value
0 1 t(1161.463) = 1.503 1.332353e-01
0 2 t(1036.849) = 0.686 4.928577e-01
0 3 t(245.143) = 0.251 8.021842e-01
0 4 t(1120.104) = 2.673 7.630544e-03
0 5 t(1004.801) = 0.479 6.323643e-01
0 6 t(280.425) = 1.034 3.018785e-01
0 7 t(1049.944) = 2.667 7.759826e-03
0 8 t(993.351) = 1.405 1.604652e-01
0 9 t(328.093) = 1.015 3.109746e-01
0 10 t(286.24) = 1.551 1.221201e-01
0 11 t(1061.639) = 1.974 4.866575e-02
0 12 t(1272.101) = 1.305 1.919959e-01
0 13 t(623.937) = 5.777 1.200948e-08
0 14 t(929.477) = 5.152 3.153290e-07
0 15 t(370.165) = 1.422 1.558977e-01
0 16 t(316.924) = 3.926 1.060657e-04
0 17 t(918.086) = 3.257 1.166437e-03
0 18 t(302.234) = 0.117 9.068413e-01
0 19 t(164.423) = 0.846 3.986233e-01
0 20 t(920.439) = 3.736 1.981471e-04
0 21 t(331.793) = 0.639 5.230612e-01
0 22 t(63.201) = 2.617 1.108971e-02
0 23 t(1111.951) = 3.769 1.727388e-04
0 24 t(1125.188) = 2.433 1.514789e-02

Cohen’s D

analytic_D_baseline <- mapply(analytic_my.d,0, seq(1,24,1),SIMPLIFY=FALSE) 
baseline_cohen_d(analytic_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
0 1 0.0879752
0 2 0.0329980
0 3 0.0206107
0 4 0.1587215
0 5 0.0235235
0 6 0.0867045
0 7 0.1620807
0 8 0.0687147
0 9 0.0805849
0 10 0.1282654
0 11 0.1023933
0 12 0.0694416
0 13 0.3954264
0 14 0.2534133
0 15 0.1138341
0 16 0.3057368
0 17 0.1588173
0 18 0.0101558
0 19 0.0861013
0 20 0.1802980
0 21 0.0529819
0 22 0.3237240
0 23 0.2018620
0 24 0.1262979

Mean Differences

analytic_mean_baseline <- mapply(analytic_mean, 0, seq(1,24,1)) #across all of the months comparing to time zero
baseline_mean_diff(analytic_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
0 1 1.3114081
0 2 0.4935284
0 3 0.3039970
0 4 2.3251490
0 5 0.3411544
0 6 1.3027809
0 7 2.3954214
0 8 0.9976299
0 9 1.1986758
0 10 1.9188652
0 11 1.4369448
0 12 1.0438407
0 13 5.7784625
0 14 3.5880071
0 15 1.7436794
0 16 4.4919977
0 17 2.2602447
0 18 0.1589776
0 19 1.3178462
0 20 2.5943085
0 21 0.8151869
0 22 4.8802673
0 23 2.8046380
0 24 1.8105501

Cogproc

T-test

cogproc_ttest_baseline <- mapply(cogproc_my.t, 0, seq(1,24,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
baseline_ttest(cogproc_ttest_baseline)
Summary of Welch’s t-Tests
t t + 1 t-statistic p-value
0 1 t(1156.51) = -0.51 6.103480e-01
0 2 t(1035.97) = -0.718 4.730063e-01
0 3 t(218.72) = -0.239 8.112280e-01
0 4 t(1119.697) = -1.842 6.578607e-02
0 5 t(1051.938) = -0.376 7.067326e-01
0 6 t(282.794) = 0.244 8.072301e-01
0 7 t(1029.213) = -1.714 8.679890e-02
0 8 t(1076.642) = -0.954 3.403915e-01
0 9 t(320.307) = 1.045 2.970093e-01
0 10 t(255.259) = -0.817 4.147599e-01
0 11 t(1147.575) = -0.725 4.688845e-01
0 12 t(1307.905) = -2.028 4.276280e-02
0 13 t(609.245) = -5.701 1.854777e-08
0 14 t(924.043) = -6.591 7.328808e-11
0 15 t(395.995) = -0.386 7.000311e-01
0 16 t(298.221) = -4.081 5.758392e-05
0 17 t(949.003) = -5.465 5.916345e-08
0 18 t(310.668) = 0.926 3.549182e-01
0 19 t(184.738) = -0.58 5.628182e-01
0 20 t(936.808) = -3.799 1.544264e-04
0 21 t(341.615) = 0.764 4.454529e-01
0 22 t(61.973) = -1.382 1.719203e-01
0 23 t(1140.023) = -1.069 2.852706e-01
0 24 t(1172.335) = -1.859 6.323237e-02

Cohen’s D

cogproc_D_baseline <- mapply(cogproc_my.d, 0, seq(1,24,1),SIMPLIFY=FALSE)
baseline_cohen_d(cogproc_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
0 1 -0.0298959
0 2 -0.0345459
0 3 -0.0213194
0 4 -0.1093919
0 5 -0.0180369
0 6 0.0203613
0 7 -0.1048291
0 8 -0.0445936
0 9 0.0841121
0 10 -0.0731906
0 11 -0.0364241
0 12 -0.1070381
0 13 -0.3938811
0 14 -0.3255788
0 15 -0.0297828
0 16 -0.3291694
0 17 -0.2601030
0 18 0.0788773
0 19 -0.0527050
0 20 -0.1809343
0 21 0.0622160
0 22 -0.1777619
0 23 -0.0568265
0 24 -0.0951265

Mean Differences

cogproc_mean_baseline <- mapply(cogproc_mean, 0, seq(1,24,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(cogproc_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
0 1 -0.6107287
0 2 0.1784774
0 3 0.6094504
0 4 -0.6540232
0 5 0.1559844
0 6 0.7442075
0 7 -0.2962170
0 8 -0.2746360
0 9 0.5304979
0 10 -0.5357971
0 11 0.2775877
0 12 -0.0886600
0 13 -0.6107287
0 14 0.1784774
0 15 0.6094504
0 16 -0.6540232
0 17 0.1559844
0 18 0.7442075
0 19 -0.2962170
0 20 -0.2746360
0 21 0.5304979
0 22 -0.5357971
0 23 0.2775877
0 24 -0.0886600

I-words

T-test

i_ttest_baseline <- mapply(i_my.t, 0, seq(1,24,1),SIMPLIFY=FALSE) #compare t (first paratheseses) to t[i] (second parentheses) increasing by 1
baseline_ttest(i_ttest_baseline)
Summary of Welch’s t-Tests
t t + 1 t-statistic p-value
0 1 t(1143.818) = -3.345 8.495412e-04
0 2 t(1155.183) = -1.196 2.318220e-01
0 3 t(213.553) = -0.191 8.486000e-01
0 4 t(1114.307) = -4.144 3.672274e-05
0 5 t(1056.559) = -0.648 5.173329e-01
0 6 t(278.03) = -1.611 1.082868e-01
0 7 t(1035.231) = -3.353 8.273950e-04
0 8 t(1066.958) = -2.058 3.981213e-02
0 9 t(265.192) = -1.417 1.577272e-01
0 10 t(284.305) = -2.775 5.890772e-03
0 11 t(1154.305) = -1.985 4.739397e-02
0 12 t(1263.498) = -0.332 7.399444e-01
0 13 t(571.485) = -5.028 6.644118e-07
0 14 t(958.88) = -3.709 2.197939e-04
0 15 t(390.578) = 0.221 8.248697e-01
0 16 t(253.435) = -3.925 1.115955e-04
0 17 t(1005.422) = -4.473 8.580050e-06
0 18 t(350.624) = 0.413 6.794966e-01
0 19 t(180.598) = -2.646 8.864330e-03
0 20 t(986.111) = -4.378 1.326045e-05
0 21 t(371.13) = -1.322 1.869275e-01
0 22 t(63.336) = 1.351 1.815790e-01
0 23 t(1250.838) = -5.622 2.322252e-08
0 24 t(1254.797) = -1.893 5.857980e-02

Cohen’s D

i_D_baseline <- mapply(i_my.d, 0, seq(1,24,1),SIMPLIFY=FALSE)
baseline_cohen_d(i_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
0 1 -0.1965974
0 2 -0.0543981
0 3 -0.0173720
0 4 -0.2467407
0 5 -0.0309676
0 6 -0.1358241
0 7 -0.2047181
0 8 -0.0966976
0 9 -0.1296303
0 10 -0.2305339
0 11 -0.0995545
0 12 -0.0176937
0 13 -0.3562055
0 14 -0.1785725
0 15 0.0172266
0 16 -0.3536629
0 17 -0.2047237
0 18 0.0327380
0 19 -0.2453415
0 20 -0.2010721
0 21 -0.1028381
0 22 0.1664219
0 23 -0.2903836
0 24 -0.0945412

Mean Differences

i_mean_baseline <- mapply(i_mean, 0, seq(1,24,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(i_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
0 1 -0.1747670
0 2 -0.0504304
0 3 -0.0148774
0 4 -0.2082233
0 5 -0.0265697
0 6 -0.1159251
0 7 -0.1744079
0 8 -0.0846426
0 9 -0.1162156
0 10 -0.1958046
0 11 -0.0842683
0 12 -0.0149918
0 13 -0.3027962
0 14 -0.1477429
0 15 0.0147325
0 16 -0.3094191
0 17 -0.1804999
0 18 0.0278142
0 19 -0.2085583
0 20 -0.1756567
0 21 -0.0870600
0 22 0.1422027
0 23 -0.2489924
0 24 -0.0832828

We-words

T-test

we_ttest_baseline <- mapply(we_my.t, 0, seq(1,24,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
baseline_ttest(we_ttest_baseline)
Summary of Welch’s t-Tests
t t + 1 t-statistic p-value
0 1 t(1161.884) = 0.572 5.675785e-01
0 2 t(1008.446) = 1.592 1.117125e-01
0 3 t(214.746) = -1.069 2.864739e-01
0 4 t(1116.226) = 0.615 5.384335e-01
0 5 t(979.103) = 0.94 3.476349e-01
0 6 t(280.316) = -1.18 2.391716e-01
0 7 t(1067.876) = -0.204 8.386752e-01
0 8 t(972.543) = 0.65 5.160283e-01
0 9 t(351.29) = -0.631 5.286168e-01
0 10 t(309.043) = -0.968 3.339559e-01
0 11 t(1073.791) = -0.927 3.542624e-01
0 12 t(1197.173) = -0.4 6.891035e-01
0 13 t(676.589) = 3.36 8.220450e-04
0 14 t(890.336) = 5.66 2.040178e-08
0 15 t(395.823) = 0.423 6.723924e-01
0 16 t(317.82) = 3.39 7.875779e-04
0 17 t(889.197) = 5.136 3.456716e-07
0 18 t(361.984) = -0.716 4.741820e-01
0 19 t(191.377) = 2.309 2.199015e-02
0 20 t(873.543) = 4.18 3.205482e-05
0 21 t(390.061) = 0.867 3.866454e-01
0 22 t(64.772) = 0.229 8.197829e-01
0 23 t(1081.131) = 2.543 1.113820e-02
0 24 t(1080.954) = 2.287 2.237292e-02

Cohen’s D

we_D_baseline <- mapply(we_my.d, 0, seq(1,24,1),SIMPLIFY=FALSE)
baseline_cohen_d(we_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
0 1 0.0334412
0 2 0.0777773
0 3 -0.0966754
0 4 0.0362120
0 5 0.0468851
0 6 -0.0989057
0 7 -0.0122764
0 8 0.0321927
0 9 -0.0482579
0 10 -0.0764371
0 11 -0.0478523
0 12 -0.0216259
0 13 0.2228626
0 14 0.2873740
0 15 0.0326963
0 16 0.2635803
0 17 0.2566654
0 18 -0.0557482
0 19 0.2039772
0 20 0.2102911
0 21 0.0657068
0 22 0.0270689
0 23 0.1373736
0 24 0.1204946

Mean Differences

we_mean_baseline <- mapply(we_mean, 0, seq(1,24,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(we_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
0 1 0.0530735
0 2 0.1226640
0 3 -0.1575023
0 4 0.0544833
0 5 0.0717923
0 6 -0.1604908
0 7 -0.0190853
0 8 0.0495303
0 9 -0.0765531
0 10 -0.1217559
0 11 -0.0731274
0 12 -0.0334520
0 13 0.3443412
0 14 0.4206792
0 15 0.0530747
0 16 0.4180032
0 17 0.3814797
0 18 -0.0895754
0 19 0.3272803
0 20 0.3089956
0 21 0.1048303
0 22 0.0439469
0 23 0.2022358
0 24 0.1812803

2019-2021 Graphs

Analytic Thinking

Analytic <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date_mean, y = Analytic_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date_mean,ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy_2, aes(x = Date_mean, y = Analytic_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy_2, aes(x = Date_mean,ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error, fill = "Reddit"), alpha=0.2) +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  ggtitle("Analytic Thinking") +
  labs(x = "Month", y = 'Standardized score') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 2) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)

Analytic <- Analytic + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=45,label="Summer 2020 surge", size = 5) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=47,label="Winter 2020 surge", size = 5) 

Analytic <- Analytic +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

Analytic

Analytic_CEO <- ggplot(data=df2, aes(x=Date_mean, y=Analytic_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  geom_ribbon(aes(ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error),fill = 'dodgerblue3', alpha=0.2) +
  ggtitle("Analytic Thinking") +
  labs(x = "Month", y = 'Standardized score') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 2) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)
Analytic_CEO <- Analytic_CEO + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=50,label="Summer 2020 surge", size = 3) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=43,label="Winter 2020 surge", size = 3)

Cogproc

Cogproc <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date_mean, y = cogproc_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date_mean,ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy_2, aes(x = Date_mean, y = cogproc_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy_2, aes(x = Date_mean,ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error, fill = "Reddit"), alpha=0.2) +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  ggtitle("Cognitive Processing") +
  labs(x = "Month", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 2) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)

Cogproc <- Cogproc + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=12.2,label="Summer 2020 surge", size = 5) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=12.3,label="Winter 2020 surge", size = 5) 

Cogproc <- Cogproc +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

Cogproc

CEO_Cogproc <- ggplot(data=df2, aes(x=Date_mean, y=cogproc_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  geom_ribbon(aes(ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error),fill = 'dodgerblue3', alpha=0.2) +
  ggtitle("Cognitive Processing") +
  labs(x = "Month", y = '% Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 1) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)
CEO_Cogproc <- CEO_Cogproc + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=12.5,label="Summer 2020 surge", size = 3) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=12.5,label="Winter 2020 surge", size = 3)

I words

i <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date_mean, y = i_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date_mean,ymin=i_mean-i_std.error, ymax=i_mean+i_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy_2, aes(x = Date_mean, y = i_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy_2, aes(x = Date_mean,ymin=i_mean-i_std.error, ymax=i_mean+i_std.error, fill = "Reddit"), alpha=0.2) +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  ggtitle("I-usage") +
  labs(x = "Month", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 2) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)

i <- i + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=2.25,label="Summer 2020 surge", size = 5) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=1.95,label="Winter 2020 surge", size = 5) 

i <- i +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))
i

CEO_i <- ggplot(data=df2, aes(x=Date_mean, y=i_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  geom_ribbon(aes(ymin=i_mean-i_std.error, ymax=i_mean+i_std.error),fill = 'dodgerblue3', alpha=0.2) +
  ggtitle("I-usage") +
  labs(x = "Month", y = '% Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 1) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)
CEO_i <- CEO_i + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=1.95,label="Summer 2020 surge", size = 3) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=1.95,label="Winter 2020 surge", size = 3)

We words

we <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date_mean, y = we_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date_mean,ymin=we_mean-we_std.error, ymax=we_mean+we_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy_2, aes(x = Date_mean, y = we_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy_2, aes(x = Date_mean,ymin=we_mean-we_std.error, ymax=we_mean+we_std.error, fill = "Reddit"), alpha=0.2) +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  ggtitle("We-usage") +
  labs(x = "Month", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 2) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)

we <- we + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=6.5,label="Summer 2020 surge", size = 5) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=6,label="Winter 2020 surge", size = 5) 
we <- we +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))
we

CEO_we <- ggplot(data=df2, aes(x=Date_mean, y=we_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  geom_ribbon(aes(ymin=we_mean-we_std.error, ymax=we_mean+we_std.error),fill = 'dodgerblue3', alpha=0.2) +
  ggtitle("We-usage") +
  labs(x = "Month", y = '% Total Words') +
  plot_aes + #here's our plot aes object
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), linetype = 1) +
  geom_rect(data = df2, #summer surge
            aes(xmin = as.Date("2020-06-15", "%Y-%m-%d"), 
                xmax = as.Date("2020-07-20",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009) +
  geom_rect(data = df2, #winter surge
            aes(xmin = as.Date("2020-11-15", "%Y-%m-%d"), 
                xmax = as.Date("2021-01-01",  "%Y-%m-%d"),
                ymin = -Inf, 
                ymax = Inf),
            fill = "gray", 
            alpha = 0.009)
CEO_we <- CEO_we + annotate(geom="text",x=as.Date("2020-07-01"),
                                y=6.5,label="Summer 2020 surge", size = 3) + 
  annotate(geom="text",x=as.Date("2020-12-03"),
           y=6.5,label="Winter 2020 surge", size = 3)

Tie them all together

graphs <- ggpubr::ggarrange(Analytic,Cogproc,i,we,ncol=2, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(graphs,
                top = text_grob("CEOs' Language Change",  color = "black", face = "bold", size = 20))

CEO Only Graphs

graphs <- ggpubr::ggarrange(Analytic_CEO,CEO_Cogproc,CEO_i,CEO_we ,ncol=2, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(graphs,
                top = text_grob("CEOs' Language Change",  color = "black", face = "bold", size = 20))

Exploratory Analyses

df <-read_csv("https://raw.githubusercontent.com/scm1210/Language_Lab_Repro/main/Big_CEO.csv") #read in the data from github 
#put code here to read in Big CEO data
df <- df %>% filter(WC<=5400)   %>% 
  filter(WC>=25)

df$month_year <- format(as.Date(df$Date), "%Y-%m") ###extracting month and year to build fiscal quarter graphs, need a new variable bc if not it'll give us issues

df <- df["2010-01-01"<= df$Date & df$Date <= "2021-04-01",] #subsetting covid dates 

data_tidy <- df %>% dplyr::select(Date, Speaker, Analytic, cogproc,allnone,we,i,emo_anx) %>%
  mutate(Date = lubridate::ymd(Date),
         time_month = as.numeric(Date - ymd("2010-03-01")) /91, #centering at start of Q1 2020, divide by 91 bc ~ number days in a quarter 
         time_month_quad = time_month * time_month) #making our quadratic term, which is not needed

data_tidy$month_year <- format(as.Date(data_tidy$Date), "%Y-%m") ###extracting month and year to build fiscal quarter graphs, need a new variable bc if not it'll give us issues
data_tidy$Date_off <- floor(data_tidy$time_month) #rounding off dates to whole months using floor function (0 = Q1 2020)
data_tidy$Date_covid <- as.factor(data_tidy$Date_off) #factorize

Load in Reddit data

reddit <- read_csv("~/Dropbox/CEO-data/LIWC-15-Data/LIWC_BLMProject_22cities_01012016-04302021_weeklyavg.csv")

reddit <- reddit %>%
  mutate(month_year = format(Week, "%Y-%m"))

### clean and tidy reddit data 

#reddit$month_year <- as.Date(reddit$month_year)

reddit$month_year <- format(as.Date(reddit$Week), "%Y-%m") ###extracting month and year to build fiscal quarter graphs, need a new variable bc if not it'll give us issues

reddit <- reddit %>% filter(WC<=5400)   %>% 
  filter(WC>=25)

reddit_tidy <- reddit %>%#converting our dates to quarterly dates 
  group_by(month_year) %>% ###grouping by the Top100 tag and date 
  summarise_at(vars("Week","WC", "Analytic","we","i",'insight'),  funs(mean, std.error),) #pulling the means and SEs for our variables of interest

reddit_tidy <- reddit_tidy[reddit_tidy$month_year <= "2021-04",] #subsetting covid dates 

Write our Table Funcions

baseline_ttest <- function(ttest_list) {
  n <- length(ttest_list)  # Get the length of ttest_list
  
  # Extract relevant information from each test and store in a data frame
  ttest_df <- data.frame(
    Group1 = rep(39, n),
    Group2 = seq(-1, 44, 1)[1:n],
    t = sapply(ttest_list, function(x) paste0("t(", round(x$parameter, 3), ") = ", round(x$statistic, 3))),
    p_value = sapply(ttest_list, function(x) x$p.value)
  )
  
  # Format p-values as scientific notation
  ttest_df$p_value <- format(ttest_df$p_value, scientific = TRUE)
  
  # Rename columns
  colnames(ttest_df) <- c("Group1", "Group2", "t-statistic", "p-value")
  
  # Create table using kableExtra
  kable(ttest_df, caption = "Summary of Welch's t-Tests", booktabs = TRUE) %>%
   kableExtra::kable_styling()
}



baseline_cohen_d <- function(cohen_d_list) {
  n <- length(cohen_d_list)  # Get the length of cohen_d_list
  
  # Extract relevant information from each test and store in a data frame
  cohen_d_df <- data.frame(
    `t` = rep(39, n),
    `t + 1` = seq(-1, 44, 1)[1:n],
    `Cohen's d` = sapply(cohen_d_list, function(x) {
      if (is.atomic(x)) {
        return(x)
      } else {
        return(x$estimate)
      }
    })
  )
  
  # Rename columns
  colnames(cohen_d_df) <- c("t", "t + 1", "Cohen's d")
  
  # Create table using kableExtra
  kable(cohen_d_df, caption = "Summary of Cohen's D", booktabs = TRUE) %>%
    kableExtra::kable_styling()
}

baseline_mean_diff <- function(mean_diff_list) {
  n <- length(mean_diff_list)  # Get the length of mean_diff_list
  
  # Extract relevant information from each mean difference calculation and store in a data frame
  mean_diff_df <- data.frame(
    Group1 = rep(39, n),
    Group2 = seq(-1, 44, 1)[1:n],
    `Mean Difference` = mean_diff_list
  )
  
  # Rename columns
  colnames(mean_diff_df) <- c("t", "t+1", "Mean Difference")
  
  # Create table using kableExtra
  kable(mean_diff_df, caption = "Summary of Mean Differences", booktabs = TRUE) %>%
    kableExtra::kable_styling()
}

Write our Stats Funcions

Analytic Thinking

analytic_my.t = function(fac1, fac2){
  t.test(data_tidy$Analytic[data_tidy$Date_covid==fac1], 
         data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t[i] 

analytic_my.d = function(fac1, fac2){
  cohen.d(data_tidy$Analytic[data_tidy$Date_covid==fac1], 
          data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #function for cohen's d

analytic_mean <-  function(fac1, fac2){
  mean(data_tidy$Analytic[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$Analytic[data_tidy$Date_covid==fac2])
} #function to do mean differences

Cognitive Processing

cogproc_my.t = function(fac1, fac2){
  t.test(data_tidy$cogproc[data_tidy$Date_covid==fac1], 
         data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t[i] 


cogproc_my.d = function(fac1, fac2){
  cohen.d(data_tidy$cogproc[data_tidy$Date_covid==fac1], 
          data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #function for cohen's d

cogproc_mean <-  function(fac1, fac2){
  mean(data_tidy$cogproc[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$cogproc[data_tidy$Date_covid==fac2])
} #function to do mean differences

I-words

i_my.t = function(fac1, fac2){
  t.test(data_tidy$i[data_tidy$Date_covid==fac1], 
         data_tidy$i[data_tidy$Date_covid==fac2])
} #writing our t-test function to compare t to t + 1 

i_my.d = function(fac1, fac2){
  cohen.d(data_tidy$i[data_tidy$Date_covid==fac1], 
          data_tidy$i[data_tidy$Date_covid==fac2])
} #function for cohen's d


i_mean <-  function(fac1, fac2){
  mean(data_tidy$i[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$i[data_tidy$Date_covid==fac2])
} #function to do mean differences

We-words

we_my.t = function(fac1, fac2){
  t.test(data_tidy$we[data_tidy$Date_covid==fac1], 
         data_tidy$we[data_tidy$Date_covid==fac2])
} 

we_my.d = function(fac1, fac2){
  cohen.d(data_tidy$we[data_tidy$Date_covid==fac1], 
          data_tidy$we[data_tidy$Date_covid==fac2])
} #function for cohen's d

we_mean <-  function(fac1, fac2){
  mean(data_tidy$we[data_tidy$Date_covid==fac1])- 
    mean(data_tidy$we[data_tidy$Date_covid==fac2])
} #function to do mean differences

Analyses

Analytic Thining

T-test

analytic_ttest_baseline <-mapply(analytic_my.t,39, seq(1,44,1),SIMPLIFY=FALSE) #compare t (first parantheses) to t[i] (second parentheses)increasing by 1
baseline_ttest(analytic_ttest_baseline)
Summary of Welch’s t-Tests
Group1 Group2 t-statistic p-value
39 -1 t(1398.238) = -3.958 7.935887e-05
39 0 t(1316.759) = -2.829 4.743809e-03
39 1 t(1104.194) = -4.448 9.563828e-06
39 2 t(1326.762) = -3.111 1.903600e-03
39 3 t(1700.928) = -1.482 1.384678e-01
39 4 t(1728.557) = -0.774 4.388235e-01
39 5 t(1638.866) = -2.607 9.224253e-03
39 6 t(1889.515) = -1.85 6.444644e-02
39 7 t(2364.197) = -2.444 1.459342e-02
39 8 t(2496.556) = -0.954 3.399879e-01
39 9 t(2555.646) = -2.34 1.935830e-02
39 10 t(3156.363) = -1.073 2.835575e-01
39 11 t(3065.339) = -1.384 1.665191e-01
39 12 t(3004.136) = -0.423 6.724037e-01
39 13 t(2715.245) = -2.739 6.193577e-03
39 14 t(3089.695) = -2.773 5.592719e-03
39 15 t(3047.279) = -2.26 2.391210e-02
39 16 t(3074.353) = -1.681 9.278904e-02
39 17 t(2762.79) = -3.086 2.047006e-03
39 18 t(3018.943) = -2.858 4.298007e-03
39 19 t(3050.245) = -1.99 4.672003e-02
39 20 t(3023.232) = -0.22 8.260843e-01
39 21 t(2747.907) = -0.775 4.381557e-01
39 22 t(2981.893) = -1.599 1.099204e-01
39 23 t(3090.796) = -1.463 1.434427e-01
39 24 t(3047.223) = -1.777 7.564602e-02
39 25 t(2754.568) = -1.784 7.460686e-02
39 26 t(2935.258) = -2.214 2.690626e-02
39 27 t(3100.911) = -1.71 8.734543e-02
39 28 t(3084.53) = -0.391 6.956618e-01
39 29 t(2764.156) = -1.613 1.067840e-01
39 30 t(2860.302) = -0.985 3.245110e-01
39 31 t(3026.977) = -1.256 2.092528e-01
39 32 t(3071.939) = -0.182 8.559538e-01
39 33 t(2771.37) = -1.342 1.797001e-01
39 34 t(2808.116) = -1.902 5.732281e-02
39 35 t(3027.247) = -1.259 2.082164e-01
39 36 t(3010.301) = -0.285 7.753956e-01
39 37 t(2760) = 0 1.000000e+00
39 38 t(2689.448) = 3.751 1.795358e-04
39 39 t(2962.494) = 2.376 1.754338e-02
39 40 t(3041.46) = 2.343 1.919049e-02
39 41 t(2599.104) = 1.854 6.387322e-02
39 42 t(2701.291) = 0.504 6.141739e-01

Cohen’s D

analytic_D_baseline <- mapply(analytic_my.d,39, seq(1,44,1),SIMPLIFY=FALSE) 
baseline_cohen_d(analytic_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
39 -1 -0.1851029
39 0 -0.1339318
39 1 -0.2227250
39 2 -0.1475299
39 3 -0.0657255
39 4 -0.0341759
39 5 -0.1166355
39 6 -0.0796613
39 7 -0.0983884
39 8 -0.0377881
39 9 -0.0918577
39 10 -0.0364593
39 11 -0.0492566
39 12 -0.0151757
39 13 -0.1048534
39 14 -0.0956737
39 15 -0.0797918
39 16 -0.0588432
39 17 -0.1173865
39 18 -0.0943563
39 19 -0.0693780
39 20 -0.0076498
39 21 -0.0295652
39 22 -0.0517852
39 23 -0.0503695
39 24 -0.0612642
39 25 -0.0679003
39 26 -0.0708322
39 27 -0.0580598
39 28 -0.0132602
39 29 -0.0613006
39 30 -0.0310062
39 31 -0.0424310
39 32 -0.0060546
39 33 -0.0509440
39 34 -0.0587861
39 35 -0.0420544
39 36 -0.0095347
39 37 0.0000000
39 38 0.1181953
39 39 0.0790406
39 40 0.0778936
39 41 0.0723611
39 42 0.0193511

Mean Differences

analytic_mean_baseline <- mapply(analytic_mean, 39, seq(1,44,1)) #across all of the months comparing to time zero
baseline_mean_diff(analytic_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
39 -1 -2.5595130
39 0 -1.8237112
39 1 -3.1031328
39 2 -2.0355221
39 3 -0.9202096
39 4 -0.4735812
39 5 -1.5973706
39 6 -1.0953330
39 7 -1.3665653
39 8 -0.5164610
39 9 -1.2581615
39 10 -0.5232129
39 11 -0.6991442
39 12 -0.2083640
39 13 -1.4530301
39 14 -1.3405792
39 15 -1.1050628
39 16 -0.8215474
39 17 -1.5740651
39 18 -1.3251540
39 19 -0.9586343
39 20 -0.1047645
39 21 -0.4018354
39 22 -0.7327220
39 23 -0.7061372
39 24 -0.8469288
39 25 -0.9293251
39 26 -1.0042991
39 27 -0.8187721
39 28 -0.1862589
39 29 -0.8430907
39 30 -0.4408389
39 31 -0.5884785
39 32 -0.0854432
39 33 -0.6972308
39 34 -0.8428210
39 35 -0.5871033
39 36 -0.1326276
39 37 0.0000000
39 38 1.6383487
39 39 1.0925364
39 40 1.0940622
39 41 0.9841566
39 42 0.2666881

Cogproc

T-test

cogproc_ttest_baseline <- mapply(cogproc_my.t, 39, seq(-1,44,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
baseline_ttest(cogproc_ttest_baseline)
Summary of Welch’s t-Tests
Group1 Group2 t-statistic p-value
39 -1 t(1596.222) = -2.415 1.584343e-02
39 0 t(1896.147) = -3.384 7.292514e-04
39 1 t(1569.956) = -3.665 2.557183e-04
39 2 t(1448.795) = -2.554 1.074468e-02
39 3 t(1268.938) = -1.709 8.770750e-02
39 4 t(1432.206) = -3.144 1.702590e-03
39 5 t(1903.641) = -4.841 1.392629e-06
39 6 t(1856.011) = -4.492 7.501149e-06
39 7 t(1671.938) = -2.555 1.070559e-02
39 8 t(2041.807) = -2.55 1.083707e-02
39 9 t(2448.976) = -3.049 2.321103e-03
39 10 t(2515.99) = -3.439 5.924926e-04
39 11 t(2590.933) = -2.754 5.920130e-03
39 12 t(2917.857) = -3.834 1.285412e-04
39 13 t(3005.391) = -4.139 3.576382e-05
39 14 t(2878.554) = -3.362 7.848321e-04
39 15 t(2729.533) = -1.394 1.634747e-01
39 16 t(2937.206) = -2.929 3.426514e-03
39 17 t(2930.779) = -2.068 3.871690e-02
39 18 t(2974.592) = -1.231 2.183927e-01
39 19 t(2750.003) = -1.093 2.745876e-01
39 20 t(2879.244) = -2.64 8.338538e-03
39 21 t(2826.016) = -1.209 2.268931e-01
39 22 t(2872.258) = -1.939 5.256728e-02
39 23 t(2733.175) = -1.48 1.390976e-01
39 24 t(2747.232) = -2.891 3.865562e-03
39 25 t(2933.786) = -1.07 2.846932e-01
39 26 t(2953.155) = -0.8 4.238448e-01
39 27 t(2746.072) = -2.129 3.331096e-02
39 28 t(2750.139) = -1.545 1.223460e-01
39 29 t(2880.016) = -0.7 4.841564e-01
39 30 t(2979.921) = -1.257 2.087883e-01
39 31 t(2757.621) = -0.795 4.265726e-01
39 32 t(2568.527) = -1.64 1.010420e-01
39 33 t(2892.73) = -0.589 5.559665e-01
39 34 t(2905.335) = -1.252 2.107755e-01
39 35 t(2759.555) = 0.285 7.756173e-01
39 36 t(2574.395) = -0.612 5.403712e-01
39 37 t(2847.493) = -0.568 5.702390e-01
39 38 t(2921.523) = -0.641 5.215064e-01
39 39 t(2760) = 0 1.000000e+00
39 40 t(2455.193) = -6.888 7.144573e-12
39 41 t(2772.368) = -6.631 3.987064e-11
39 42 t(2795.825) = -4.044 5.404755e-05
39 43 t(2633.899) = -0.338 7.354177e-01
39 44 t(2717.576) = -1.687 9.172727e-02

Cohen’s D

cogproc_D_baseline <- mapply(cogproc_my.d,39, seq(-1,44,1),SIMPLIFY=FALSE)
baseline_cohen_d(cogproc_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
39 -1 -0.1082032
39 0 -0.1452980
39 1 -0.1644938
39 2 -0.1166419
39 3 -0.0807181
39 4 -0.1449077
39 5 -0.2075203
39 6 -0.1940441
39 7 -0.1136000
39 8 -0.1073236
39 9 -0.1217299
39 10 -0.1359451
39 11 -0.1077753
39 12 -0.1335815
39 13 -0.1483548
39 14 -0.1219518
39 15 -0.0533229
39 16 -0.1026109
39 17 -0.0738308
39 18 -0.0435087
39 19 -0.0415667
39 20 -0.0885809
39 21 -0.0430370
39 22 -0.0684869
39 23 -0.0563917
39 24 -0.0964562
39 25 -0.0374106
39 26 -0.0278350
39 27 -0.0810604
39 28 -0.0506828
39 29 -0.0243024
39 30 -0.0430806
39 31 -0.0302226
39 32 -0.0539653
39 33 -0.0201837
39 34 -0.0425144
39 35 0.0108260
39 36 -0.0196669
39 37 -0.0193508
39 38 -0.0216342
39 39 0.0000000
39 40 -0.2262115
39 41 -0.2255093
39 42 -0.1381970
39 43 -0.0131311
39 44 -0.0646561

Mean Differences

cogproc_mean_baseline <- mapply(cogproc_mean, 39, seq(-1,44,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(cogproc_meandiff)
Summary of Mean Differences
t t+1 Mean Difference
39 -1 -0.6107287
39 0 0.1784774
39 1 0.6094504
39 2 -0.6540232
39 3 0.1559844
39 4 0.7442075
39 5 -0.2962170
39 6 -0.2746360
39 7 0.5304979
39 8 -0.5357971
39 9 0.2775877
39 10 -0.0886600

I-words

T-test

i_ttest_baseline <- mapply(i_my.t, 39, seq(-1,44,1),SIMPLIFY=FALSE) #compare t (first paratheseses) to t[i] (second parentheses) increasing by 1
baseline_ttest(i_ttest_baseline)
Summary of Welch’s t-Tests
Group1 Group2 t-statistic p-value
39 -1 t(1552.097) = -0.547 5.847291e-01
39 0 t(1891.692) = 0.927 3.540054e-01
39 1 t(1543.548) = -1.288 1.980085e-01
39 2 t(1384.86) = -0.109 9.135023e-01
39 3 t(1243.226) = -1.623 1.048443e-01
39 4 t(1406.99) = -0.995 3.199685e-01
39 5 t(1807.256) = -2.507 1.226620e-02
39 6 t(1839.806) = -3.324 9.032501e-04
39 7 t(1676.736) = -2.53 1.148118e-02
39 8 t(1968.571) = -2.1 3.587236e-02
39 9 t(2437.139) = -1.237 2.163517e-01
39 10 t(2522.069) = -1.346 1.784795e-01
39 11 t(2584.073) = -1.162 2.454416e-01
39 12 t(2968.434) = -1.419 1.558820e-01
39 13 t(3026.77) = -2.397 1.657169e-02
39 14 t(2993.59) = -2.067 3.878505e-02
39 15 t(2732) = -0.717 4.734852e-01
39 16 t(2965.139) = -0.102 9.187266e-01
39 17 t(3003.951) = 0.591 5.547413e-01
39 18 t(2941.793) = -0.298 7.653434e-01
39 19 t(2746.3) = 1.066 2.866121e-01
39 20 t(3052.212) = 1.024 3.060977e-01
39 21 t(2945.021) = 1.199 2.304594e-01
39 22 t(2914.983) = 0.722 4.704495e-01
39 23 t(2749.491) = -0.7 4.839122e-01
39 24 t(2859.859) = 1.32 1.870804e-01
39 25 t(2961.854) = 1.542 1.232059e-01
39 26 t(2959.922) = 0.898 3.692053e-01
39 27 t(2754.67) = -1.204 2.286463e-01
39 28 t(2796.851) = 1.861 6.285424e-02
39 29 t(2950.089) = 1.04 2.984570e-01
39 30 t(2957.818) = 0.508 6.116638e-01
39 31 t(2761.248) = -2.113 3.466039e-02
39 32 t(2623.154) = 1.589 1.122499e-01
39 33 t(2902.226) = 1.451 1.468228e-01
39 34 t(2837.131) = 1.36 1.740380e-01
39 35 t(2772.998) = 0.273 7.845833e-01
39 36 t(2685.979) = 1.471 1.413725e-01
39 37 t(2847.315) = 1.777 7.574039e-02
39 38 t(2842.537) = 0.368 7.130643e-01
39 39 t(2760) = 0 1.000000e+00
39 40 t(2443.982) = -0.094 9.249572e-01
39 41 t(2883.506) = -2.165 3.046355e-02
39 42 t(2869.361) = -1.774 7.624031e-02
39 43 t(2624.604) = -2.188 2.878893e-02
39 44 t(2714.734) = 0.937 3.486622e-01

Cohen’s D

i_D_baseline <- mapply(i_my.d, 39, seq(-1,44,1),SIMPLIFY=FALSE)
baseline_cohen_d(i_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
39 -1 -0.0247279
39 0 0.0398365
39 1 -0.0581512
39 2 -0.0050462
39 3 -0.0772991
39 4 -0.0461594
39 5 -0.1091999
39 6 -0.1440105
39 7 -0.1124059
39 8 -0.0893624
39 9 -0.0494378
39 10 -0.0531632
39 11 -0.0454914
39 12 -0.0492032
39 13 -0.0857292
39 14 -0.0742666
39 15 -0.0274178
39 16 -0.0035654
39 17 0.0209493
39 18 -0.0105813
39 19 0.0405406
39 20 0.0336718
39 21 0.0422507
39 22 0.0253916
39 23 -0.0266926
39 24 0.0433859
39 25 0.0537619
39 26 0.0312322
39 27 -0.0458412
39 28 0.0606377
39 29 0.0358631
39 30 0.0174393
39 31 -0.0802960
39 32 0.0517999
39 33 0.0496875
39 34 0.0465307
39 35 0.0103788
39 36 0.0463627
39 37 0.0605520
39 38 0.0125214
39 39 0.0000000
39 40 -0.0031002
39 41 -0.0726695
39 42 -0.0601078
39 43 -0.0851660
39 44 0.0359521

Mean Differences

i_mean_baseline <- mapply(i_mean, 39, seq(1,44,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(i_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
39 -1 -0.0501342
39 0 -0.0043782
39 1 -0.0669225
39 2 -0.0403834
39 3 -0.0961358
39 4 -0.1254502
39 5 -0.0988873
39 6 -0.0779165
39 7 -0.0430919
39 8 -0.0464607
39 9 -0.0396123
39 10 -0.0431445
39 11 -0.0772021
39 12 -0.0658681
39 13 -0.0237140
39 14 -0.0031143
39 15 0.0185136
39 16 -0.0091472
39 17 0.0341431
39 18 0.0309884
39 19 0.0366076
39 20 0.0218302
39 21 -0.0233920
39 22 0.0385859
39 23 0.0469517
39 24 0.0272901
39 25 -0.0407296
39 26 0.0537960
39 27 0.0313834
39 28 0.0153213
39 29 -0.0721711
39 30 0.0447642
39 31 0.0432108
39 32 0.0399635
39 33 0.0091137
39 34 0.0417002
39 35 0.0522583
39 36 0.0108053
39 37 0.0000000
39 38 -0.0025900
39 39 -0.0638521
39 40 -0.0523031
39 41 -0.0732920
39 42 0.0314656

We-words

T-test

we_ttest_baseline <- mapply(we_my.t, 39, seq(-1,44,1),SIMPLIFY=FALSE) #compare t (first parathese) to t[i] (second parantheses) increasing by 1
baseline_ttest(we_ttest_baseline)
Summary of Welch’s t-Tests
Group1 Group2 t-statistic p-value
39 -1 t(1465.793) = 4.671 3.279146e-06
39 0 t(1848.152) = 6.423 1.697612e-10
39 1 t(1470.592) = 6.634 4.571561e-11
39 2 t(1340.255) = 4.894 1.108406e-06
39 3 t(1186.083) = 4.105 4.315229e-05
39 4 t(1346.481) = 4.108 4.228869e-05
39 5 t(1834.74) = 4.329 1.576337e-05
39 6 t(1781.674) = 5.422 6.681106e-08
39 7 t(1634.64) = 4.157 3.391822e-05
39 8 t(1907.992) = 5.269 1.524353e-07
39 9 t(2378.729) = 4.998 6.210940e-07
39 10 t(2500.799) = 4.77 1.952074e-06
39 11 t(2535.466) = 4.063 4.986150e-05
39 12 t(3089.99) = 4.718 2.484294e-06
39 13 t(3056.043) = 5.46 5.155028e-08
39 14 t(3020.142) = 3.676 2.413218e-04
39 15 t(2708.481) = 3.988 6.850857e-05
39 16 t(3036.488) = 5.82 6.499374e-09
39 17 t(3068.469) = 4.267 2.040157e-05
39 18 t(3121.075) = 3.099 1.956938e-03
39 19 t(2762.422) = 4.309 1.693639e-05
39 20 t(3050.317) = 5.009 5.775021e-07
39 21 t(3014.948) = 4.539 5.866656e-06
39 22 t(3073.922) = 3.843 1.242059e-04
39 23 t(2746.502) = 3.824 1.344061e-04
39 24 t(2875.797) = 4.586 4.708222e-06
39 25 t(3081.404) = 3.337 8.573867e-04
39 26 t(3087.905) = 3.526 4.279508e-04
39 27 t(2757.36) = 4.105 4.154902e-05
39 28 t(2979.319) = 4.253 2.174582e-05
39 29 t(3056.368) = 4.106 4.135301e-05
39 30 t(3039.528) = 3.015 2.590510e-03
39 31 t(2767.115) = 3.101 1.949471e-03
39 32 t(2771.482) = 3.391 7.054067e-04
39 33 t(2979.963) = 3.003 2.698641e-03
39 34 t(3046.109) = 1.892 5.860198e-02
39 35 t(2770.676) = 0.404 6.863047e-01
39 36 t(2773.232) = 3.487 4.962671e-04
39 37 t(2990.159) = 2.309 2.101141e-02
39 38 t(2986.735) = 1.665 9.596844e-02
39 39 t(2760) = 0 1.000000e+00
39 40 t(2634.135) = 6.693 2.665399e-11
39 41 t(2865.847) = 9.071 2.130887e-19
39 42 t(2866.786) = 7.127 1.291552e-12
39 43 t(2619.501) = 4.263 2.089149e-05
39 44 t(2715.201) = 4.971 7.070835e-07

Cohen’s D

we_D_baseline <- mapply(we_my.d, 0, seq(-1,44,1),SIMPLIFY=FALSE)
baseline_cohen_d(we_D_baseline)
Summary of Cohen’s D
t t + 1 Cohen’s d
39 -1 -0.0557103
39 0 0.0000000
39 1 0.0282300
39 2 -0.0463795
39 3 -0.0765264
39 4 -0.0785886
39 5 -0.0903266
39 6 -0.0369619
39 7 -0.0861466
39 8 -0.0471293
39 9 -0.0696648
39 10 -0.0852994
39 11 -0.1091741
39 12 -0.1044371
39 13 -0.0699960
39 14 -0.1377183
39 15 -0.1142513
39 16 -0.0696756
39 17 -0.1164580
39 18 -0.1531635
39 19 -0.1116030
39 20 -0.0985302
39 21 -0.1149236
39 22 -0.1345665
39 23 -0.1278698
39 24 -0.1211008
39 25 -0.1517493
39 26 -0.1449650
39 27 -0.1242977
39 28 -0.1244390
39 29 -0.1282285
39 30 -0.1664816
39 31 -0.1564665
39 32 -0.1608429
39 33 -0.1709072
39 34 -0.2024843
39 35 -0.2561414
39 36 -0.1541242
39 37 -0.1924580
39 38 -0.2139859
39 39 -0.2779748
39 40 -0.0613540
39 41 0.0272928
39 42 -0.0377803
39 43 -0.1145791
39 44 -0.0872167

Mean Differences

we_mean_baseline <- mapply(we_mean, 39, seq(-1,44,1)) # comparing time zero [3/2020]across all of the months
baseline_mean_diff(we_mean_baseline)
Summary of Mean Differences
t t+1 Mean Difference
39 -1 0.3137668
39 0 0.3955121
39 1 0.4359992
39 2 0.3290490
39 3 0.2855660
39 4 0.2808382
39 5 0.2669796
39 6 0.3419809
39 7 0.2698555
39 8 0.3270471
39 9 0.2931861
39 10 0.2721081
39 11 0.2342147
39 12 0.2395374
39 13 0.2901721
39 14 0.1927559
39 15 0.2253730
39 16 0.2937306
39 17 0.2221707
39 18 0.1625054
39 19 0.2350808
39 20 0.2468167
39 21 0.2291996
39 22 0.1960384
39 23 0.2103506
39 24 0.2188209
39 25 0.1698311
39 26 0.1792838
39 27 0.2204439
39 28 0.2051901
39 29 0.2059084
39 30 0.1504079
39 31 0.1695979
39 32 0.1585671
39 33 0.1475049
39 34 0.0937065
39 35 0.0222390
39 36 0.1626729
39 37 0.1131470
39 38 0.0815058
39 39 0.0000000
39 40 0.3068743
39 41 0.4343882
39 42 0.3420502
39 43 0.2341940
39 44 0.2713253

Figures

df2<- df %>%
  mutate(Date=as.yearqtr(Date)) %>% #converting our dates to quarterly dates 
  group_by(Top100, Date) %>% ###grouping by the Top100 tag and date 
  summarise_at(vars("WC", "Analytic", "Clout", "Drives","cogproc","focuspast","focuspresent",
                    "focusfuture","power","allure","we","i",'insight','emo_anx','allnone',),  funs(mean, std.error),) #pulling the means and SEs for our variables of interest
df2 <- df2["2010 Q1"<= df2$Date & df2$Date <= "2021 Q1",]
df2 <- df2[!duplicated(df2[c('Date')]),] #need to run this to get rid of duplicates for dates created when we collaspe into quarterly dates


reddit_tidy <- reddit %>%
  mutate(Date=as.yearqtr(month_year)) %>% #converting our dates to quarterly dates 
  group_by(month_year) %>% ###grouping by the Top100 tag and date 
  summarise_at(vars("Week","WC", "Analytic", "Clout","cogproc","focuspast","focuspresent",
                    "focusfuture","power","we","i",'insight',),  funs(mean, std.error),) #pulling the means and SEs for our variables of interest

reddit_tidy <- reddit_tidy %>%
  mutate(Quarter = as.yearqtr(Week_mean))
reddit_tidy <- reddit_tidy[!duplicated(reddit_tidy[c('Quarter')]),] #need to run this to get rid of duplicates for dates created when we collaspe into quarterly dates

Analytic Thinking

analytic <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date, y = Analytic_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date,ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy, aes(x = Quarter, y = Analytic_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy, aes(x = Quarter,ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error, fill = "Reddit"), alpha=0.2) +
  ggtitle("Analytic Thinking") + 
  labs(x = "Month", y = 'Standardized score') +
  plot_aes + #here's our plot aes object
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))


analytic <- analytic + annotate("text", x = 2011.5 , y = 36,
         label = "paste(italic(d[2010]), \" = -0.0817\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 36,label = "paste(italic(d[2015]), \" = -0.050\")", parse = TRUE,size = 5)

analytic <- analytic +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

analytic

CEO_Analytic_decade <- ggplot(data=df2, aes(x=Date, y=Analytic_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  #geom_point(size=.5) +
  #scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  ggtitle("Analytic Thinking") + 
  geom_ribbon(aes(ymin=Analytic_mean-Analytic_std.error, ymax=Analytic_mean+Analytic_std.error),fill = 'dodgerblue3', alpha=0.2) +
  labs(x = "Month", y = 'Standardized score') +
  plot_aes + #here's our plot aes object
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))

CEO_Analytic_decade <- CEO_Analytic_decade + annotate("text", x = 2011.5 , y = 36,
         label = "paste(italic(d[2010]), \" = -0.0817\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 36,label = "paste(italic(d[2015]), \" = -0.050\")", parse = TRUE,size = 5)

Cognitive Processing

cogproc_decade <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date, y = cogproc_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date,ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy, aes(x = Quarter, y = cogproc_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy, aes(x = Quarter,ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error, fill = "Reddit"), alpha=0.2) +
  ggtitle("Cognitive Processing") + 
  labs(x = "Fiscal Quarter", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))


cogproc <- cogproc_decade + annotate("text", x = 2011.5 , y = 11.2,
                         label = "paste(italic(d[2010]), \" = -0.082\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 11.2,label = "paste(italic(d[2015]), \" = -0.043\")", parse = TRUE, size = 5)

cogproc <- cogproc +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

cogproc

ceo_cogproc_decade <- ggplot(data=df2, aes(x=Date, y=cogproc_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  #geom_point() +
  #scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  ggtitle("Cognitive Processing") + 
  plot_aes + #here's our plot aes object
  geom_ribbon(aes(ymin=cogproc_mean-cogproc_std.error, ymax=cogproc_mean+cogproc_std.error),fill = 'dodgerblue3', alpha=0.2) +
  labs(x = "Month", y = '% of total words') +
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))


ceo_cogproc_decade <- ceo_cogproc_decade + annotate("text", x = 2011.5 , y = 11.2,
                         label = "paste(italic(d[2010]), \" = -0.082\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 11.2,label = "paste(italic(d[2015]), \" = -0.043\")", parse = TRUE, size = 5)

I-usage

i_decade <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date, y = i_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date,ymin=i_mean-i_std.error, ymax=i_mean+i_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy, aes(x = Quarter, y = i_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy, aes(x = Quarter,ymin=i_mean-i_std.error, ymax=i_mean+i_std.error, fill = "Reddit"), alpha=0.2) +
  ggtitle("I-usage") + 
  labs(x = "Fiscal Quarter", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))

i <- i_decade  + annotate("text", x = 2011.5 , y = 1.6,
                     label = "paste(italic(d[2010]), \" = -0.012\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 1.6,label = "paste(italic(d[2015]), \" = 0.024\")", parse = TRUE,size = 5)

i <- i +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

i

ceo_i_decade <- ggplot(data=df2, aes(x=Date, y=i_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  #geom_point() +
  #scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  ggtitle("Self-focus") + 
  plot_aes + #here's our plot aes object
  geom_ribbon(aes(ymin=i_mean-i_std.error, ymax=i_mean+i_std.error),fill = 'dodgerblue3', alpha=0.2) +
  labs(x = "Month", y = '% of total words') +
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))

ceo_i_decade <- ceo_i_decade  + annotate("text", x = 2011.5 , y = 1.6,
                     label = "paste(italic(d[2010]), \" = -0.012\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 1.6,label = "paste(italic(d[2015]), \" = 0.024\")", parse = TRUE,size = 5)

We-usage

we_decade <- 
  ggplot() +
  geom_line(data = df2, aes(x = Date, y = we_mean, color = "CEO"), #set our colors 
              linetype = 'solid') +
  geom_ribbon(data = df2, aes(x = Date,ymin=we_mean-we_std.error, ymax=we_mean+we_std.error, fill = "CEO"), alpha=0.2) +
geom_line(data = reddit_tidy, aes(x = Quarter, y = we_mean, color = "Reddit"), #set our colors 
              linetype = 'solid') +
geom_ribbon(data = reddit_tidy, aes(x = Quarter,ymin=we_mean-we_std.error, ymax=we_mean+we_std.error, fill = "Reddit"), alpha=0.2) +
  ggtitle("We-usage") + 
  labs(x = "Fiscal Quarter", y = '% of Total Words') +
  plot_aes + #here's our plot aes object
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))

we <- we_decade + annotate("text", x = 2011.5 , y = 5.25,
                     label = "paste(italic(d[2010]), \" = 0.156\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 5.25,label = "paste(italic(d[2015]), \" = 0.112\")", parse = TRUE,size = 5)

we <- we +
  scale_color_manual(values = c(CEO = "dodgerblue3", Reddit = "red")) +
  scale_fill_manual(values = c(CEO = "dodgerblue3", Reddit = "red"))

we

ceo_we_decade <- ggplot(data=df2, aes(x=Date, y=we_mean, group=1)) +
  geom_line(colour = "dodgerblue3") +
  #geom_point() +
  #scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  ggtitle("Collective-focus") + 
  plot_aes + #here's our plot aes object
  geom_ribbon(aes(ymin=we_mean-we_std.error, ymax=we_mean+we_std.error),fill = 'dodgerblue3', alpha=0.2) +
  labs(x = "Month", y = '% of total words') +
  scale_x_yearqtr(name="Fiscal Quarter",format = "%YQ%q", n= 5) + #change the N to change the number of tick marks
  geom_vline(xintercept = as.numeric(as.yearqtr("2020 Q1")), linetype=1, show.legend = T) +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 16)) + 
  theme(axis.text=element_text(size=16),
        axis.title=element_text(size=20,face="bold"))
ceo_we_decade <- ceo_we_decade + annotate("text", x = 2011.5 , y = 5.25,
                     label = "paste(italic(d[2010]), \" = 0.156\")", parse = TRUE,size = 5) + 
  annotate("text", x = 2015.5 , y = 5.25,label = "paste(italic(d[2015]), \" = 0.112\")", parse = TRUE,size = 5)

Tie together

graphs <- ggpubr::ggarrange(analytic,cogproc,i,we,ncol=2, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(graphs,
                top = text_grob("CEOs' Language Change over the Decade compared to reddit data", color = "black", face = "bold", size = 20))

CEO only graphs

graphs <- ggpubr::ggarrange(CEO_Analytic_decade,ceo_cogproc_decade,ceo_i_decade,ceo_we_decade,ncol=2, nrow=2, common.legend = TRUE, legend = "bottom")
annotate_figure(graphs,
                top = text_grob("CEOs' Language Change over the Decade", color = "black", face = "bold", size = 20))

Package Citations

report::cite_packages()
##   - Grolemund G, Wickham H (2011). "Dates and Times Made Easy with lubridate." _Journal of Statistical Software_, *40*(3), 1-25. <https://www.jstatsoft.org/v40/i03/>.
##   - J L (2006). "Plotrix: a package in the red light district of R." _R-News_, *6*(4), 8-12.
##   - Kassambara A (2023). _ggpubr: 'ggplot2' Based Publication Ready Plots_. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
##   - Kuhn M (2022). _caret: Classification and Regression Training_. R package version 6.0-93, <https://CRAN.R-project.org/package=caret>.
##   - Lin G (2022). _reactable: Interactive Data Tables Based on 'React Table'_. R package version 0.3.0, <https://CRAN.R-project.org/package=reactable>.
##   - Müller K, Wickham H (2023). _tibble: Simple Data Frames_. R package version 3.2.1, <https://CRAN.R-project.org/package=tibble>.
##   - R Core Team (2022). _R: A Language and Environment for Statistical Computing_. R Foundation for Statistical Computing, Vienna, Austria. <https://www.R-project.org/>.
##   - Rinker TW, Kurkiewicz D (2018). _pacman: Package Management for R_. version 0.5.0, <http://github.com/trinker/pacman>.
##   - Robinson D, Hayes A, Couch S (2023). _broom: Convert Statistical Objects into Tidy Tibbles_. R package version 1.0.4, <https://CRAN.R-project.org/package=broom>.
##   - Sarkar D (2008). _Lattice: Multivariate Data Visualization with R_. Springer, New York. ISBN 978-0-387-75968-5, <http://lmdvr.r-forge.r-project.org>.
##   - Torchiano M (2020). _effsize: Efficient Effect Size Computation_. doi:10.5281/zenodo.1480624 <https://doi.org/10.5281/zenodo.1480624>, R package version 0.8.1, <https://CRAN.R-project.org/package=effsize>.
##   - Wickham H (2016). _ggplot2: Elegant Graphics for Data Analysis_. Springer-Verlag New York. ISBN 978-3-319-24277-4, <https://ggplot2.tidyverse.org>.
##   - Wickham H (2022). _stringr: Simple, Consistent Wrappers for Common String Operations_. R package version 1.5.0, <https://CRAN.R-project.org/package=stringr>.
##   - Wickham H (2023). _forcats: Tools for Working with Categorical Variables (Factors)_. R package version 1.0.0, <https://CRAN.R-project.org/package=forcats>.
##   - Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." _Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
##   - Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A Grammar of Data Manipulation_. R package version 1.1.2, <https://CRAN.R-project.org/package=dplyr>.
##   - Wickham H, Henry L (2023). _purrr: Functional Programming Tools_. R package version 1.0.1, <https://CRAN.R-project.org/package=purrr>.
##   - Wickham H, Hester J, Bryan J (2023). _readr: Read Rectangular Text Data_. R package version 2.1.4, <https://CRAN.R-project.org/package=readr>.
##   - Wickham H, Vaughan D, Girlich M (2023). _tidyr: Tidy Messy Data_. R package version 1.3.0, <https://CRAN.R-project.org/package=tidyr>.
##   - Zeileis A, Grothendieck G (2005). "zoo: S3 Infrastructure for Regular and Irregular Time Series." _Journal of Statistical Software_, *14*(6), 1-27. doi:10.18637/jss.v014.i06 <https://doi.org/10.18637/jss.v014.i06>.
##   - Zhu H (2021). _kableExtra: Construct Complex Table with 'kable' and Pipe Syntax_. R package version 1.3.4, <https://CRAN.R-project.org/package=kableExtra>.